library(ggplot2)
library(dplyr)
library(readr)
library(treemap)
library(ggrepel)
library(plotly)
library(knitr)
library(kableExtra)

Business Question 1

What is the average rating and total number of reviews for each product?

Business_Question_1 <- read_csv("Business Question 1.csv")

# Select top 15 products 
top_15_products <- Business_Question_1 %>%
  mutate(score = (avg_rating * 0.6) + (total_reviews / max(total_reviews) * 0.4) * 5) %>%
  arrange(desc(score)) %>%
  slice_head(n = 15)

# Scatter plot with top 15 products
Business_Question_1_Plot <- ggplot(top_15_products, aes(x = total_reviews, y = avg_rating)) +
  geom_point(color = "blue", size = 4) +
  geom_text_repel(
    aes(label = product_name), 
    size = 2.5,
    max.overlaps = Inf,
    box.padding = 0.5,
    point.padding = 0.5
  ) +
  labs(
    title = "Product Reviews vs Ratings (Top 15)",
    x = "Number of Reviews", 
    y = "Rating"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  )

(Business_Question_1_Plot)

Business_Question_1_Table <- kable(top_15_products, caption = "Top 15 Products Based on Rating and Review Score")

Business_Question_1_Table
Top 15 Products Based on Rating and Review Score
brand product_name avg_rating total_reviews score
bareMinerals Original Loose Powder Mineral Foundation Broad Spectrum SPF 15 4.5 19000 4.700000
Anastasia Beverly Hills Brow Wiz 4.5 14000 4.173684
Benefit Cosmetics They re Real Lengthening Volumizing Mascara 4.0 15000 3.978947
Anastasia Beverly Hills Modern Renaissance Eye Shadow Palette 5.0 8000 3.842105
Anastasia Beverly Hills DIPBROW Pomade 4.5 10000 3.752632
beautyblender the original beautyblender 4.5 8000 3.542105
bareMinerals Matte Loose Powder Mineral Foundation Broad Spectrum SPF 15 4.5 7000 3.436842
bareMinerals Mineral Veil Setting Powder 4.5 5000 3.226316
BECCA Shimmering Skin Perfector Pressed Highlighter 4.5 5000 3.226316
Benefit Cosmetics Boi ing Cakeless Concealer 4.5 5000 3.226316
Benefit Cosmetics The POREfessional Face Primer 4.0 7000 3.136842
Benefit Cosmetics The POREfessional Face Primer Mini 4.0 7000 3.136842
Anastasia Beverly Hills Clear Brow Gel 4.5 4000 3.121053
Anastasia Beverly Hills Brow Powder Duo 4.5 4000 3.121053
bareMinerals Broad Spectrum Concealer 4.5 4000 3.121053

Business Question 2

Which brands have the highest number of products priced above $50?

Business_Question_2 <- read_csv("Business Question 2.csv") %>%
  slice_max(order_by = luxury_products, n = 15)

Business_Question_2_Plot <- ggplot(Business_Question_2, aes(x = reorder(brand, luxury_products), y = luxury_products)) +
  geom_col(fill = "darkorange") +
  coord_flip() +
  labs(
    title = "Number of Luxury Products per Brand (Top 15)",
    x = "Brand",
    y = "Number of Products"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  )

(Business_Question_2_Plot)

Business_Question_2_Table <- kable(Business_Question_2, caption = "Top 15 Brands with the Highest Number of Luxury Products")
Business_Question_2_Table
Top 15 Brands with the Highest Number of Luxury Products
brand luxury_products
TOM FORD 135
Dior 81
Jo Malone London 63
CHANEL 50
La Mer 46
Yves Saint Laurent 46
Perricone MD 44
Est e Lauder 41
Peter Thomas Roth 41
Atelier Cologne 39
Dr Barbara Sturm 39
Fresh 39
Guerlain 39
Gucci 38
Givenchy 37

Business Question 3

List all limited edition products with their prices and ratings

Business_Question_3 <- read_csv("Business Question 3.csv") %>%
  slice_max(order_by = price, n = 15)

Business_Question_3_Plot <- ggplot(Business_Question_3, aes(x = price, y = rating)) +
  geom_point(color = "purple", size = 4) +
  geom_text_repel(
    aes(label = product_name), 
    size = 2.5,
    max.overlaps = Inf,
    box.padding = 0.5,
    point.padding = 0.5
  ) +
  labs(
    title = "Limited Edition Products: Price vs Rating (Top 15)",
    x = "Price",
    y = "Rating"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  )

(Business_Question_3_Plot)

Business_Question_3_Table <- kable(Business_Question_3, caption = "Top 15 Limited Edition Products with Their Prices and Ratings")

Business_Question_3_Table
Top 15 Limited Edition Products with Their Prices and Ratings
product_name price rating
SpectraLite Faceware Pro Wrinkle Reducing Duo 435 1.0
Trinity Eye and Lip Enhancer Attachment Bundle 429 3.5
Supersonic Hair Dryer Gift Edition with Red Case 399 4.0
Supersonic Hair Dryer Limited Edition Gift Set 399 3.5
Supersonic Hair Dryer 23 75 Karat Gold 399 4.5
Timeless Beauty Icons Bojagi Set 324 0.0
CAXA 280 0.0
GloPRO Microneedling Facial Regeneration Tool Set 249 4.5
Deep Scarlet Platinum Styler Set 249 5.0
Cobalt Blue Platinum Styler Set Upbeat Collection 249 0.0
Glam Glow Liquid Lights Vault 235 4.0
Bella Sofia Look Set 230 4.5
Pitera Hydrating Essence Set 229 5.0
Flight Essentials Kit 225 3.0
Personal Microderm PRO and PMD Clean Holiday Exclusive Bundle 219 4.5

Business Question 4

What is the total number of online only products in each category?

Business_Question_4 <- read_csv("Business Question 4.csv") %>%
  slice_max(order_by = online_only_count, n = 15)

Business_Question_4_Plot <- ggplot(Business_Question_4, aes(x = reorder(category, online_only_count), y = online_only_count)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(
    title = "Online-Only Products per Category (Top 15)",
    x = "Category",
    y = "Number of Online-Only Products"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  )

(Business_Question_4_Plot)

Business_Question_4_Table <- kable(Business_Question_4, caption = "Top 15 Categories with the Highest Number of Online-Only Products")

Business_Question_4_Table
Top 15 Categories with the Highest Number of Online-Only Products
category online_only_count
Value Gift Sets 172
Face Serums 119
Perfume 113
Moisturizers 88
Candles Home Scents 77
Shampoo 61
Hair Styling Products 59
Body Lotions Body Oils 50
Perfume Gift Sets 50
Beauty Supplements 50
Face Wash Cleansers 50
Mini Size 49
Conditioner 47
Rollerballs Travel Size 39
Face Masks 39
Lotions Oils 39

Business Question 5

Identify brand performance tiers based on average product rating, review count, and price point

Business_Question_5 <- read_csv("Business Question 5.csv")

Business_Question_5_Plot <- ggplot(Business_Question_5, aes(x = reorder(brand_tier, -avg_rating), fill = brand_tier)) +
  geom_bar() +
  labs(
    title = "Number of Brands by Performance Tier",
    x = "Performance Tier",
    y = "Count of Brands"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.margin = margin(1, 1, 1, 1, "cm"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

(Business_Question_5_Plot)

Business_Question_5_Table_Data <- read_csv("Business Question 5.csv") %>%
  slice_max(order_by = avg_rating, n = 15)

Business_Question_5_Table <- kable(Business_Question_5_Table_Data, caption = "Brand Performance Tiers")
Business_Question_5_Table
Brand Performance Tiers
brand avg_rating total_reviews avg_price product_count brand_tier rating_percentile_rank review_percentile_rank price_percentile_rank
Montblanc 5.000000 222 66.6667 6 Mid Tier 99.07 17.96 75.23
Aether Beauty 5.000000 452 47.3333 6 Mid Tier 99.07 26.93 56.66
Fable Mane 5.000000 18 28.8000 5 Value Tier 99.07 3.10 24.77
Four Sigmatic 5.000000 110 24.0000 2 Value Tier 99.07 12.07 12.69
Golde 4.875000 45 27.2500 4 Value Tier 98.76 5.88 20.12
RODIN olio lusso 4.812500 77 128.6250 8 Mid Tier 98.45 10.22 93.81
The Art of Shaving 4.700000 620 43.3333 15 Mid Tier 98.14 33.13 51.08
Paco Rabanne 4.687500 625 85.3750 8 Premium Tier 97.52 33.44 86.38
SOBEL SKIN Rx 4.687500 503 73.5000 8 Mid Tier 97.52 30.03 78.95
Valentino 4.666667 469 92.6667 3 Premium Tier 96.59 27.86 89.47
StackedSkincare 4.666667 498 84.6667 9 Premium Tier 96.59 29.72 85.76
dae 4.666667 121 25.3333 3 Value Tier 96.59 13.00 15.79
Carolina Herrera 4.625000 761 86.4167 12 Premium Tier 96.28 38.08 86.69
Better Not Younger 4.607143 356 33.1429 14 Value Tier 95.98 22.60 33.75
Proactiv 4.590909 1954 45.2727 22 Mid Tier 95.67 55.73 55.11

Business Question 6

Find products that outperform their brand’s average in both ratings and price efficiency (rating-to-price ratio)

Business_Question_6 <- read_csv("Business Question 6.csv")

# Filter top-performing products
top_performers <- Business_Question_6 %>%
  filter(rating > brand_avg_rating, rating_per_dollar > brand_avg_rating_per_dollar) %>%
  mutate(performance_score = (rating - brand_avg_rating) + 
           (rating_per_dollar - brand_avg_rating_per_dollar)) %>%
  arrange(desc(performance_score)) %>%
  slice(1:10)  # Reduced to top 10 for better readability

Business_Question_6_Plot <- ggplot(top_performers, aes(x = reorder(product_name, performance_score), y = performance_score)) +
  geom_col(aes(fill = category)) +
  geom_text_repel(
    aes(label = brand), 
    size = 2.5,
    box.padding = 0.5,  # Space between label and bar
    point.padding = 0.5,  # Space around the label
    nudge_x = 0.1,  # Slight horizontal shift to avoid overlap
    direction = "y",  # Keep labels aligned vertically
    hjust = 1,  # Adjust horizontal justification
    segment.color = "grey50"  # Color of the connecting line
  ) +
  coord_flip() +
  labs(
    title = "Top Products Outperforming Brand Averages",
    subtitle = "Products exceeding rating and price efficiency metrics",
    x = "Product Name",
    y = "Performance Score",
    fill = "Category"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 10),
    plot.margin = margin(1, 2, 1, 1, "cm"),  # Increase right margin for label space
    legend.position = "bottom",  # Move legend to the bottom
    axis.text.y = element_text(size = 8)  # Adjust the font size for the y-axis labels (product names)
  ) +
  scale_x_discrete(expand = c(0.2, 0))  # Adjusts spacing on x-axis to prevent cutting off

(Business_Question_6_Plot)

# Create and display the table with smaller dimensions using kableExtra
Business_Question_6_Table <- kable(top_performers, caption = "Top Performing Products Based on Rating and Price Efficiency") %>%
  kable_styling(
    font_size = 7,  # Smaller font size
    position = "center",  # Center the table
    bootstrap_options = c("striped", "hover")  # Add striped rows and hover effect
  ) %>%
  column_spec(1, width = "3cm") %>%  # Adjust the width of the first column (product names)
  column_spec(2, width = "3cm") %>%  # Adjust the width of the second column (performance score)
  column_spec(3, width = "3cm") %>%  # Adjust the width of the third column (category)
  row_spec(0, bold = TRUE)  # Make the header bold

# Display the table
Business_Question_6_Table
Top Performing Products Based on Rating and Price Efficiency
product_name brand category rating brand_avg_rating rating_diff_percentage price rating_per_dollar brand_avg_rating_per_dollar category_rating_percentile category_efficiency_percentile number_of_reviews performance_score
Snag Free Hair Elastics SEPHORA COLLECTION Hair Accessories 4.5 3.857692 16.65 4 112.50 33.69 57.14 100.00 1000 79.45231
Soft Touch Cotton Pads SEPHORA COLLECTION Makeup Removers 4.5 3.857692 16.65 4 112.50 33.69 53.85 94.87 328 79.45231
Mini Dramatically Different Moisturizing Lotion CLINIQUE Skincare 4.0 3.921610 2.00 5 80.00 16.01 18.00 98.00 1000 64.06839
Pencil Sharpener NARS Mirrors Sharpeners 4.5 4.114035 9.38 6 75.00 15.22 50.00 87.50 836 60.16596
Foot Mask SEPHORA COLLECTION Hand Cream Foot Cream 4.5 3.857692 16.65 5 90.00 33.69 52.00 100.00 956 56.95231
Quench Intense Hydration Mask Saturday Skin Face Masks 4.5 4.466667 0.75 6 75.00 25.09 40.34 93.70 773 49.94333
Eye Mask SEPHORA COLLECTION Eye Masks 4.0 3.857692 3.69 5 80.00 33.69 25.00 91.67 827 46.45231
Eye Mask Grape Smoothing SEPHORA COLLECTION Eye Masks 4.0 3.857692 3.69 5 80.00 33.69 25.00 91.67 827 46.45231
Hand Mask SEPHORA COLLECTION Hand Cream Foot Cream 4.0 3.857692 3.69 5 80.00 33.69 20.00 92.00 394 46.45231
The Microdelivery Exfoliating Facial Wash philosophy Face Wash Cleansers 4.5 3.982143 13.00 8 56.25 12.45 38.71 99.60 3000 44.31786

Business Question 7

Which products contribute the most to their brand’s total revenue in each category?

Business_Question_7 <- read_csv("Business Question 7.csv") %>%
  slice_max(order_by = product_revenue, n = 20)

# Horizontal bar chart for better readability
Business_Question_7_Plot <- ggplot(Business_Question_7, aes(x = reorder(brand, -contribution_percentage), y = contribution_percentage, fill = product_name)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Contribution Percentage to Revenue by Products (Stacked Bar Chart)",
    x = "Brand",
    y = "Contribution Percentage (%)",
    fill = "Product"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom",  # Position the legend at the bottom
    legend.text = element_text(size = 5.5),  # Smaller legend text
    legend.title = element_text(size = 8),  # Smaller legend title
    legend.key.size = unit(0.25, "cm")  # Adjust the size of the legend keys
  )

(Business_Question_7_Plot)

Business_Question_7_Table <- kable(Business_Question_7, caption = "Top 20 Products Contributing to Brand Revenue")

Business_Question_7_Table
Top 20 Products Contributing to Brand Revenue
product_name brand category product_revenue contribution_percentage
Flowerbomb Viktor Rolf Perfume 660000 59.01
Black Opium Eau de Parfum Yves Saint Laurent Perfume 620000 36.64
Original Loose Powder Mineral Foundation Broad Spectrum SPF 15 bareMinerals Foundation 608000 31.34
Pro Filt r Soft Matte Longwear Foundation FENTY BEAUTY by Rihanna Foundation 525000 31.20
Lock It Foundation KVD Vegan Beauty Foundation 518000 21.27
Blush NARS Blush 510000 22.03
Alpha Beta Extra Strength Daily Peel Dr Dennis Gross Skincare Facial Peels 440000 41.23
Ultra HD Invisible Cover Foundation MAKE UP FOR EVER Foundation 387000 25.09
They re Real Lengthening Volumizing Mascara Benefit Cosmetics Mascara 375000 17.85
Better Than Sex Mascara Too Faced Mascara 375000 15.64
Truth Serum OLEHENRIKSEN Face Serums 370000 25.56
Good Genes All In One Lactic Acid Treatment SUNDAY RILEY Face Serums 366000 31.45
Tattoo Eyeliner KVD Vegan Beauty Eyeliner 357000 14.66
Born This Way Foundation Too Faced Foundation 351000 14.64
100 percent Pure Argan Oil Josie Maran Face Oils 343000 35.31
Modern Renaissance Eye Shadow Palette Anastasia Beverly Hills Eye Palettes 336000 16.27
All Nighter Long Lasting Makeup Setting Spray Jumbo Urban Decay Setting Spray Powder 336000 13.02
Radiant Creamy Concealer NARS Concealer 330000 14.25
24 7 Glide On Eye Pencil Urban Decay Eyeliner 330000 12.78
Veil Mineral Primer Hourglass Face Primer 324000 16.32
Naked3 Palette Urban Decay Eye Palettes 324000 12.55

Business Question 8

Which brands have the highest number of high-rated products (above 4.5) in at least 3 different categories?

Business_Question_8 <- read_csv("Business Question 8.csv") %>%
  slice_max(order_by = categories_with_high_rated, n = 15)

Business_Question_8_Plot <- ggplot(Business_Question_8, aes(x = reorder(brand, categories_with_high_rated), y = categories_with_high_rated)) +
  geom_col(fill = "darkgreen") +
  coord_flip() +
  labs(
    title = "Brands with High-Rated Products in Multiple Categories (Top 15)",
    x = "Brand",
    y = "Number of Categories"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  )

(Business_Question_8_Plot)

Business_Question_8_Table <- kable(Business_Question_8, caption = "Top 15 Brands with High-Rated Products in Multiple Categories")
Business_Question_8_Table
Top 15 Brands with High-Rated Products in Multiple Categories
brand categories_with_high_rated
SEPHORA COLLECTION 26
Dior 11
CLINIQUE 10
Kiehl s Since 1851 9
TOM FORD 9
Lanc me 8
Jack Black 8
tarte 7
Shiseido 7
Proactiv 7
Jo Malone London 6
Atelier Cologne 6
Better Not Younger 6
innisfree 6
Yves Saint Laurent 6
CHANEL 6